# -*- coding: utf-8 -*-
import random
import scrapy
from scrapy import Request, FormRequest
from scrapy.exceptions import IgnoreRequest
from scrapy.utils.project import get_project_settings
from zc_core.util.http_util import retry_request
from zc_core.model.items import Box
from zc_core.dao.batch_dao import BatchDao
from henan.rules import *
from zc_core.spiders.base import BaseSpider


class FullSpider(BaseSpider):
    name = 'full'

    # 常用链接
    supplier_page = 'http://222.143.21.205:8081/onlineRetailers/list'
    sku_frame_url = 'http://222.143.21.205:8081/category/products?pmbh={}'
    sku_list_url = 'http://222.143.21.205:8081/category/list?pmbh={}&sort=0&order=asc&ppbh=&pageNo={}&jiage={}&gysmc=&xhmc='

    def __init__(self, batchNo=None, *args, **kwargs):
        super(FullSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 创建批次记录
        settings = get_project_settings()
        BatchDao().create_batch(self.batch_no)
        self.special_ladders = settings.get('SPECIAL_LADDER', [])
        self.sku_page_limit = settings.get('SKU_PAGE_LIMIT', 150)

        # init helpers
        SupplierHelper()
        CatalogHelper()
        SpuHelper()

    def start_requests(self):
        # 品类、供应商
        yield Request(
            url=self.supplier_page,
            meta={
                'reqType': 'sku',
                'batchNo': self.batch_no,
            },
            callback=self.parse_catalog_supplier,
            errback=self.error_back,
            dont_filter=True
        )

    def parse_catalog_supplier(self, response):
        # 处理供应商列表
        suppliers = parse_supplier(response)
        if suppliers:
            self.logger.info('供应商: count[%s]' % len(suppliers))
            yield Box('supplier', self.batch_no, suppliers)

        # 处理品类列表
        cats = parse_catalog(response)
        if cats:
            self.logger.info('品类: count[%s]' % len(cats))
            yield Box('catalog', self.batch_no, cats)

        random.shuffle(cats)
        for cat in cats:
            if cat.get('level') == 3:
                cat3_id = cat.get('catalogId')
                yield FormRequest(
                    method='POST',
                    url=self.sku_frame_url.format(cat3_id),
                    callback=self.parse_price_ladder,
                    errback=self.error_back,
                    meta={
                        'reqType': 'sku',
                        'batchNo': self.batch_no,
                        'catalogId': cat3_id,
                    },
                    dont_filter=True
                )

    # 处理sku列表
    def parse_price_ladder(self, response):
        meta = response.meta
        cat3_id = meta.get('catalogId')
        if cat3_id in self.special_ladders:
            ladders = self.special_ladders[cat3_id]
            self.logger.info('指定阶梯: cat[%s]' % cat3_id)
        else:
            ladders = parse_price_ladder(response)
            self.logger.info('价格阶梯：cat=%s, cnt=%s' % (cat3_id, ladders))
        if ladders:
            for ladder in ladders:
                page = 1
                # 请求总页数
                yield Request(
                    method='POST',
                    url=self.sku_list_url.format(cat3_id, page, ladder),
                    callback=self.parse_sku_page,
                    errback=self.error_back,
                    meta={
                        'reqType': 'sku',
                        'batchNo': self.batch_no,
                        'catalogId': cat3_id,
                        'ladder': ladder,
                        'page': page,
                    },
                    dont_filter=True
                )

    # 处理sku列表
    def parse_sku_page(self, response):
        meta = response.meta
        cur_page = meta.get('page')
        cat3_id = meta.get('catalogId')
        ladder = meta.get('ladder')

        # 处理第一页
        sku_list, item_list = parse_sku_data(response)
        if sku_list and item_list:
            self.logger.info('清单：cat=%s, ladder=%s, page=%s, cnt=%s' % (cat3_id, ladder, cur_page, len(sku_list)))
            yield Box('sku', self.batch_no, sku_list)
            yield Box('item', self.batch_no, item_list)
        else:
            self.logger.info('分页为空: cat=%s, ladder=%s, page=%s, cnt=%s' % (cat3_id, ladder, cur_page, len(sku_list)))

        # 总页数
        total = parse_sku_page(response)
        if total:
            if total > self.sku_page_limit:
                self.logger.info('页数超限：cat=%s, ladder=%s, total=%s' % (cat3_id, ladder, total))
            self.logger.info('页数：cat=%s, ladder=%s, total=%s' % (cat3_id, ladder, total))

            # 后续分页请求
            for page in range(2, total + 1):
                yield FormRequest(
                    method='POST',
                    url=self.sku_list_url.format(cat3_id, page, ladder),
                    callback=self.parse_sku_data,
                    errback=self.error_back,
                    meta={
                        'reqType': 'sku',
                        'batchNo': self.batch_no,
                        'catalogId': cat3_id,
                        'ladder': ladder,
                        'page': page,
                    },
                    dont_filter=True
                )

    # 处理sku列表
    def parse_sku_data(self, response):
        meta = response.meta
        cur_page = meta.get('page')
        cat3_id = meta.get('catalogId')
        ladder = meta.get('ladder')
        sku_list, item_list = parse_sku_data(response)
        if sku_list and item_list:
            self.logger.info('清单：cat=%s, ladder=%s, page=%s, cnt=%s' % (cat3_id, ladder, cur_page, len(sku_list)))
            yield Box('sku', self.batch_no, sku_list)
            yield Box('item', self.batch_no, item_list)
        else:
            self.logger.info('分页为空: cat=%s, ladder=%s, page=%s, cnt=%s' % (cat3_id, ladder, cur_page, len(sku_list)))
